###Packages
#install.packages('dismo')
#install.packages('rworldmap')
#install.packages('sf')
#install.packages('geodata')
library(dismo)
library(rworldmap)
library(sf)
library(geodata)
library(raster)
library(scales)
library(rgdal)
library(rgeos)
library(sp)
wrld_simpl<-getMap(resolution = "coarse")
plot(wrld_simpl)
Here, lies the new coordinates of the species’ distribution map after
trimming the coordinates. There was 1 population of Yucca Glauca in
Sweden that was trimmed. Now both maps show similar areas of where the
plants ditributions are.
pairs(bio.values_ymoth[,1:5])
pairs(bio.values_yplant[,1:5])
#BIO1 = Annual Mean Temperature
#BIO2 = Mean Diurnal Range (Mean of monthly (max temp - min temp))
#BIO3 = Isothermality (BIO2/BIO7) (×100)
#BIO4 = Temperature Seasonality (standard deviation ×100)
#BIO5 = Max Temperature of Warmest Month
pairs(bio.values_ymoth[,6:10])
pairs(bio.values_yplant[,6:10])
#BIO6 = Min Temperature of Coldest Month
#BIO7 = Temperature Annual Range (BIO5-BIO6)
#BIO8 = Mean Temperature of Wettest Quarter
#BIO9 = Mean Temperature of Driest Quarter
#BIO10 = Mean Temperature of Warmest Quarter
plot(mask, axes = TRUE, col = "lightgrey",main = "The distribution of Tegeticula Yuccasella against background points",
xlim = c(-125, -70),
ylim = c(25, 55)); box()
points(Yucca_moth.coords,col = "#ff0660", pch = 20, cex = 1)
plot(e, add=TRUE, col='black')
points(bg1,col="black", pch=20)
## Linear model
gm1_5 <- glm(pa1 ~ bio1 + bio2 + bio3 + bio4 + bio5,
family = binomial(link = "logit"), data=envtrain_ym)
summary(gm1_5)
##
## Call:
## glm(formula = pa1 ~ bio1 + bio2 + bio3 + bio4 + bio5, family = binomial(link = "logit"),
## data = envtrain_ym)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8333 -0.8815 -0.5914 1.1689 2.7082
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.108946 2.893230 -0.383 0.70150
## bio1 -0.172671 0.106932 -1.615 0.10636
## bio2 -0.389223 0.174713 -2.228 0.02589 *
## bio3 -0.032887 0.062381 -0.527 0.59806
## bio4 -0.003075 0.001575 -1.952 0.05088 .
## bio5 0.373337 0.128342 2.909 0.00363 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 888.69 on 720 degrees of freedom
## Residual deviance: 810.18 on 715 degrees of freedom
## AIC: 822.18
##
## Number of Fisher Scoring iterations: 4
The linear model conducted above suggests that the bioclimatic variables that are significant are BIO 2, 4 and 5 (Mean Diurnal Range (Mean of monthly (max temp - min temp)), Temperature Seasonality (standard deviation ×100) and Max Temperature of Warmest Month)) ## Predict species distribution from the model and plot it
## class : ModelEvaluation
## n presences : 221
## n absences : 500
## AUC : 0.71019
## cor : 0.3158905
## max TPR+TNR at : -0.5862825
The evaluation shows that the linear model (gm1_5) is 35% correct when
predicting the presence and absence of Yucca moth populations. To
improve the model, i will use the varaibles that were found to have an
significant impact on the presence and absence of the Yucca moth
population.
gm1_10 <- glm(pa1 ~ bio6 + bio7 + bio8 + bio9 + bio10,
family = binomial(link = "logit"), data=envtrain_ym)
summary(gm1_10)
##
## Call:
## glm(formula = pa1 ~ bio6 + bio7 + bio8 + bio9 + bio10, family = binomial(link = "logit"),
## data = envtrain_ym)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5287 -0.8999 -0.6561 1.3257 2.1728
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.469549 0.954615 -0.492 0.622809
## bio6 -0.162950 0.075858 -2.148 0.031706 *
## bio7 -0.199343 0.064869 -3.073 0.002119 **
## bio8 -0.001432 0.019430 -0.074 0.941231
## bio9 -0.022123 0.017663 -1.253 0.210378
## bio10 0.275821 0.078234 3.526 0.000423 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 888.69 on 720 degrees of freedom
## Residual deviance: 835.15 on 715 degrees of freedom
## AIC: 847.15
##
## Number of Fisher Scoring iterations: 4
The linear model conducted above suggests that the bioclimatic variables that are significant are BIO 6, 7, 9 and 10. (Min Temperature of Coldest Month, Temperature Annual Range (BIO5-BIO6), Mean Temperature of Driest Quarter and Mean Temperature of Warmest Quarter)
## class : ModelEvaluation
## n presences : 221
## n absences : 500
## AUC : 0.657991
## cor : 0.2651074
## max TPR+TNR at : -0.9161422
The evaluation shows that the linear model (gm1_10) is 27.5% correct
when predicting the presence and absence of Yucca moth populations. To
improve the model, i will use the variables that were found to have an
significant impact on the presence and absence of the Yucca moth
population. These were variables 2,4,5,6,7,9 and 10
AIC(gm1_5,gm1_10)
## df AIC
## gm1_5 6 822.1783
## gm1_10 6 847.1540
evaluate(testpres1, testbackg1, gm1_5)
## class : ModelEvaluation
## n presences : 221
## n absences : 500
## AUC : 0.71019
## cor : 0.3158905
## max TPR+TNR at : -0.5862825
evaluate(testpres1, testbackg1, gm1_10)
## class : ModelEvaluation
## n presences : 221
## n absences : 500
## AUC : 0.657991
## cor : 0.2651074
## max TPR+TNR at : -0.9161422
par(mfrow = c(2,2))
plot(pg1_5, main='GLM probability of occurrence of species 1 associated via BIO1-5')
plot(wrld_simpl, add=TRUE, border='dark grey')
points(Yucca_moth.coords,col = "#660226", pch = 4, cex = 0.5)
plot(pg1_5 > tr1_5, main='presence/absence of Species 1 (1-5)')
plot(wrld_simpl, add=TRUE, border='dark grey')
points(Yucca_moth.coords,col = "#660226", pch = 4, cex = 0.5)
plot(pg1_10, main='GLM probability of occurrence of BIO6-10 on species 1')
plot(wrld_simpl, add=TRUE, border='dark grey')
points(Yucca_moth.coords,col = "#660226", pch = 4, cex = 0.5)
plot(pg1_10 > tr1_10, main='presence/absence species 1 (6-10)')
plot(wrld_simpl, add=TRUE, border='dark grey')
points(Yucca_moth.coords,col = "#660226", pch = 4, cex = 0.5)
gm1_5_2 <- glm(pa1 ~ bio4 + bio5+ bio6 + bio7 + bio9 +bio10,
family = binomial(link = "logit"), data=envtrain_ym)
summary(gm1_5_2)
##
## Call:
## glm(formula = pa1 ~ bio4 + bio5 + bio6 + bio7 + bio9 + bio10,
## family = binomial(link = "logit"), data = envtrain_ym)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8121 -0.9015 -0.5657 1.1293 2.7215
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.295e+00 1.025e+00 -2.240 0.02507 *
## bio4 1.500e-02 2.596e-03 5.777 7.62e-09 ***
## bio5 -1.107e+05 8.107e+04 -1.366 0.17192
## bio6 1.107e+05 8.107e+04 1.366 0.17192
## bio7 1.107e+05 8.107e+04 1.366 0.17192
## bio9 -1.608e-02 1.503e-02 -1.070 0.28459
## bio10 -3.438e-01 1.252e-01 -2.745 0.00604 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 888.69 on 720 degrees of freedom
## Residual deviance: 797.79 on 714 degrees of freedom
## AIC: 811.79
##
## Number of Fisher Scoring iterations: 4
evaluate(testpres1, testbackg1, gm1_5_2)
## class : ModelEvaluation
## n presences : 221
## n absences : 500
## AUC : 0.7209593
## cor : 0.3348035
## max TPR+TNR at : -0.7625996
AIC(gm1_5_2, gm1_5, gm1_10)
## df AIC
## gm1_5_2 7 811.7887
## gm1_5 6 822.1783
## gm1_10 6 847.1540
These are all the variables that suggested a significant relationship. When i use the variables that had the highest p value (bio 7 and 10), the evaluation statistics show that the model is only 20%. When i use all the varaibles that were significant (bio 4,5,7,9,10), the model is 35% correct. AIC statistics suggest that the model gm1_5 is the best to use (802.7771)
gm2_5 <- glm(pa2 ~ bio1 + bio2 + bio3 + bio4 + bio5,
family = binomial(link = "logit"), data=envtrain_yp)
summary(gm2_5)
##
## Call:
## glm(formula = pa2 ~ bio1 + bio2 + bio3 + bio4 + bio5, family = binomial(link = "logit"),
## data = envtrain_yp)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.1268 0.1745 0.2297 0.3321 5.3725
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 24.15260 3.32566 7.262 3.8e-13 ***
## bio1 -0.06762 0.09878 -0.685 0.494
## bio2 2.51517 0.20567 12.229 < 2e-16 ***
## bio3 -1.02230 0.08960 -11.409 < 2e-16 ***
## bio4 -0.02340 0.00184 -12.716 < 2e-16 ***
## bio5 0.04201 0.10981 0.383 0.702
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3153.8 on 4560 degrees of freedom
## Residual deviance: 1977.3 on 4555 degrees of freedom
## (2 observations deleted due to missingness)
## AIC: 1989.3
##
## Number of Fisher Scoring iterations: 6
The linear model conducted above suggests that the bioclimatic variables that are significant are BIO 2, 3, and 4 in affcting the population of Yucca Glauca (the yucca plant).
## class : ModelEvaluation
## n presences : 4061
## n absences : 500
## AUC : 0.8588247
## cor : 0.5515291
## max TPR+TNR at : 1.076133
The evaluation shows that the linear model (gm2_5) is 56% correct when
predicting the presence and absence of Yucca plant populations. To
improve the model, i will use the variables that were found to have an
significant impact on the presence and absence of the Yucca plant
population.
gm2_10 <- glm(pa2 ~ bio6 + bio7 + bio8 + bio9 + bio10,
family = binomial(link = "logit"), data=envtrain_yp)
summary(gm2_10)
##
## Call:
## glm(formula = pa2 ~ bio6 + bio7 + bio8 + bio9 + bio10, family = binomial(link = "logit"),
## data = envtrain_yp)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.6157 0.1728 0.2092 0.2749 3.5328
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -8.80231 0.95450 -9.222 < 2e-16 ***
## bio6 1.88949 0.07226 26.150 < 2e-16 ***
## bio7 1.52066 0.06385 23.816 < 2e-16 ***
## bio8 0.08395 0.01887 4.448 8.65e-06 ***
## bio9 -0.32569 0.01592 -20.459 < 2e-16 ***
## bio10 -1.56084 0.06861 -22.748 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3153.8 on 4560 degrees of freedom
## Residual deviance: 1773.3 on 4555 degrees of freedom
## (2 observations deleted due to missingness)
## AIC: 1785.3
##
## Number of Fisher Scoring iterations: 6
All of the bioclimatic variables seem to be a good indicator for the presence and absence of the yucca plant populations
## class : ModelEvaluation
## n presences : 4061
## n absences : 500
## AUC : 0.8986887
## cor : 0.6360773
## max TPR+TNR at : 1.864452
The evaluation shows that the linear model (gm2_10) is 65% correct when
predicting the presence and absence of Yucca plant populations. To
improve the model, i will use the variables that were found to have an
significant impact on the presence and absence of the Yucca plant
population. The linear model (gm_10) is the best for predicting.
AIC(gm2_5,gm2_10)
## df AIC
## gm2_5 6 1989.296
## gm2_10 6 1785.280
evaluate(testpres2, testbackg2, gm2_5)
## class : ModelEvaluation
## n presences : 4061
## n absences : 500
## AUC : 0.8588247
## cor : 0.5515291
## max TPR+TNR at : 1.076133
evaluate(testpres2, testbackg2, gm2_10)
## class : ModelEvaluation
## n presences : 4061
## n absences : 500
## AUC : 0.8986887
## cor : 0.6360773
## max TPR+TNR at : 1.864452
par(mfrow = c(2,2))
plot(pg2_5, main='GLM probability of occurrence of species 2 (BIO 1-5)')
plot(wrld_simpl, add=TRUE, border='darkgrey')
points(new.Yucca_plant.coords,col = "#014c31", pch = 4, cex = 0.5)
plot(pg2_5 > tr2_5, main='presence/absence of species 2 (BIO 1-5)')
plot(wrld_simpl, add=TRUE, border='darkgrey')
points(new.Yucca_plant.coords,col = "#014c31", pch = 4, cex = 0.5)
plot(pg2_10, main='GLM probability of occurrence of species 2 (BIO6-10)')
plot(wrld_simpl, add=TRUE, border = 'darkgrey')
points(new.Yucca_plant.coords,col = "#014c31", pch = 4, cex = 0.5)
plot(pg2_10 > tr2_10, main='presence/absence of species 2 (BIO 6-10)')
plot(wrld_simpl, add=TRUE, border = 'darkgrey')
points(new.Yucca_plant.coords,col = "#014c31", pch = 4, cex =0.5)
# Question 3: Plot the overlap in distribution of the two species.
Devise and calculate a metric for the degree of overlap between their
ranges, explaining how you calculated it.
plot(mask, axes = TRUE, col = "lightyellow", main = "Distrubutions of Tegeticula yuccasella and Yucca glauca",
xlim = c(-125, -70),
ylim = c(25, 55)); box()
points(Yucca_moth.coords,col = alpha("#ff0660", 0.7), pch = 20, cex = 1)
points(new.Yucca_plant.coords, col = alpha("#06FFA5", 0.2), pch = 20, cex = 1)
Yucca_moth_pts <- st_multipoint(yucca_moth_coords_matrix)
yucca_moth_polygon <- st_triangulate(Yucca_moth_pts, bOnlyEdges = FALSE)
plot(yucca_moth_polygon, col = '#ff0660', border = 'pink') #06FFA5 #e6fff6
Yucca_plant_pts <- st_multipoint(yucca_plant_coords_matrix)
yucca_plant_polygon <- st_triangulate(Yucca_plant_pts, bOnlyEdges = FALSE)
plot(yucca_plant_polygon, col = '#06FFA5', border = '#e6fff6') #ff0660
Here above shows a polygon that depicts the spatial area that the
populations create.
st_area(yucca_moth_polygon)
## [1] 648.6076
st_area(yucca_plant_polygon)
## [1] 870.8453
st_is_valid(yucca_moth_polygon)
## [1] TRUE
st_is_valid(yucca_plant_polygon)
## [1] TRUE
#moth_plant_intersection <- st_intersection(yucca_moth_polygon, yucca_plant_polygon)
My code did not enable me to finish with my question on the overlap of the polygons. I have worked out teh area that the population make up but I wasn’t able to find that area of intersection due to some unknown formatting area. I have checked of the polygons are valid and they are, so i wasn’t able to continue.
This question also wasn’t answered. I had no way of combining the tables so that pa1 and pa2 were in the same table to then conduct this analysis. I would predict that the two species had an significant relationship since the two species (yucca moth and yucca plant) are an infamous mutualism relationship. It is also shown on the map with both distributions are overlapping.
#gm <- glm(pa1 ~ pa2,
#family = binomial(link = "logit"), data= envtrain_yp, data = envtrain_ym)
#ym_data <- read.csv("file/path/yucca_moth_data.csv")
#yp_data <- read.csv("file/path//yucca_plant_data.csv")
#Species1 <- "Tegeticula yuccasella"
#Species2 <- "Yucca glauca"
#ym_data$Species <- c(rep(Species1, (nrow(ym_data))))
#yp_data$Species <- c(rep(Species2, (nrow(yp_data))))
#ym_data$BG <- rbind(bg1)
#both_species <- rbind(ym_data, yp_data)
#both_species_env <- cbind(pb_train_ym, pb_train_yp)
future.bio.data<-cmip6_world(model="CanESM5",var="bio",ssp="245",res=10,time="2061-2080",path=getwd())
names(future.bio.data)<-names(bio.data)
future.bio.data<-crop(future.bio.data,e)
plot(pg1_5, main='A) GLM present')
plot(wrld_simpl, add=TRUE, border='dark grey')
points(Yucca_moth.coords,col = "#660226", pch = 4, cex = 0.5)
pg1_5.future <- predict(future.bio.data, gm1_5, ext=e,type="response") #gm and pg change depending what we think is best
pg1_5.future<-crop(pg1_5.future,e)
plot(pg1_5.future, main="B) GLM, 2060-2081")
plot(wrld_simpl, add=TRUE, border='dark grey')
points(Yucca_moth.coords,col = "#660226", pch = 4, cex = 0.5)
#--- ----- ------ ---- --- --
plot(pg2_10, main='A) GLM present')
plot(wrld_simpl, add=TRUE, border='dark grey')
points(new.Yucca_plant.coords,col = "#014c31", pch = 4, cex = 0.5)
pg2_10.future <- predict(future.bio.data, gm2_10, ext=e,type="response") #gm and pg change depending what we think is best
pg2_10.future<-crop(pg2_10.future,e)
plot(pg2_10.future, main="B) GLM, 2060-2081")
plot(wrld_simpl, add=TRUE, border='dark grey')
points(new.Yucca_plant.coords,col = "#014c31", pch = 4, cex = 0.5)
Interesting, the data is showing that in 2060 - 2081, there will be
improvement in the conditions for both teh yucca moth and yucca plant
since there is more area that is green.
predict.YM.localities.now <- extract(pg1_5>=tr1_5, Yucca_moth.coords)[,-1]
sum(predict.YM.localities.now)
## [1] 162
predict.YP.localities.now <- extract(pg2_10>=tr2_10, new.Yucca_plant.coords)[,-1]
predict.YP.localities.now1 <- na.omit(predict.YP.localities.now)
sum(predict.YP.localities.now1)
## [1] 3596
#--- --- -- -- - - - - - -- -- - -- -- - - - ---- ---- -- --- --- ---
predict.YM.localities.future <- extract(pg1_5.future>=tr1_5, Yucca_moth.coords)[,-1]
predict.YM.localities.future1 <- na.omit(predict.YM.localities.future)
sum(predict.YM.localities.future1)
## [1] 197
predict.YP.localities.future <- extract(pg2_10.future>=tr2_10, new.Yucca_plant.coords)[,-1]
predict.YP.localities.future1 <- na.omit(predict.YP.localities.future)
sum(predict.YP.localities.future1)
## [1] 3906
sum((predict.YM.localities.now==0)&(predict.YM.localities.future1)) # in the future but not in the past
## Warning in (predict.YM.localities.now == 0) & (predict.YM.localities.future1):
## longer object length is not a multiple of shorter object length
## [1] 54
sum((predict.YM.localities.now==1)&(predict.YM.localities.future1==0)) # 57
## Warning in (predict.YM.localities.now == 1) & (predict.YM.localities.future1
## == : longer object length is not a multiple of shorter object length
## [1] 6
sum((predict.YP.localities.now1==0)&(predict.YP.localities.future1)) # in the future but not in the past
## Warning in (predict.YP.localities.now1 == 0) & (predict.YP.localities.future1):
## longer object length is not a multiple of shorter object length
## [1] 366
sum((predict.YP.localities.now1==1)&(predict.YP.localities.future1==0)) # 57
## Warning in (predict.YP.localities.now1 == 1) & (predict.YP.localities.future1
## == : longer object length is not a multiple of shorter object length
## [1] 51
The data above suggest further that there will be an increase in population in 2060 - 2018. The yucca moth population increases from 163 to 196 and the plant population increase from 3618 to 3909. Normally when we think of climate change and its impacts we assume a decrease in populations however form what is modeled, we can say that the mutualistic relationship of yucca moth and plant will continue to thrive in the future. There has been extinctions (6 populations and 52 for yucca moth and plant respectively) but there has also been 52 new populations. The differences in the population increase and new population is confusing (52 vs 291). There should be more work done on the prediction of population increase.